# 请求地址 https://s.weibo.com/top/summary?cate=realtimehot
import time

import requests
import parsel # 解析数据模块
url = 'https://s.weibo.com/top/summary?cate=realtimehot'
cookies = {
    'SUB': '_2AkMQv044f8NxqwFRmfATy2rrboR3zwrEieKm47_jJRMxHRl-yT9kqlwStRB6Oz9g17hXYSConG_Y1JABdqyq2prY32uB',
    'SUBP': '0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFCi89CvkD3RLNVeKjsO0XS',
}
headers = {
    'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1',
    # Requests sorts cookies= alphabetically
    'cookie': 'SUB=_2AkMQv044f8NxqwFRmfATy2rrboR3zwrEieKm47_jJRMxHRl-yT9kqlwStRB6Oz9g17hXYSConG_Y1JABdqyq2prY32uB; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFCi89CvkD3RLNVeKjsO0XS',
}

# 转数据类型
response = requests.get(url, headers=headers)
selector = parsel.Selector(response.text)
lis = selector.css('section ul li')
# 定义序号

# for ul in uls:
#     spans = []
#     title = ul.css('li a span::text').extract()
#     hot = ul.css('li a span em::text').extract()
#     rank = 1
#     for t,h in zip(title,hot):
#
#         if t == ' ':
#             continue
#         dict = {
#             '排名':rank,
#             '标题':t,
#             '热度':h,
#         }
#         rank += 1
#         print(dict)
print('采集时间：', time.strftime('%Y-%m-%d %H:%M:%S'))
for li in lis:
    rank = li.css('strong::text').extract_first()
    title = li.css('span::text').extract_first()
    hot = li.css('em::text').extract_first()
    if rank==None:
        rank = 'top'
        hot = 'top'
    dict = {
        '排名':rank,
        '标题':title,
        '热度':hot,
    }
    print(dict)
